from urllib.request import urlopen
from bs4 import BeautifulSoup
import html5lib
import datetime
import random
import re

pages=set()
def getLinks(pageUrl):
    global pages
    html=urlopen("https://www.toutiao.com"+pageUrl)
    bsObj=BeautifulSoup(html)
    for link in bsObj.findAll("a",href=re.compile("^(/ch/move/)")):
        if 'href' in link.attrs:
            if link.attrs['href'] not in pages:
                newPage=link.attrs['href']
                print(newPage)
                pages.add(newPage)
                getLinks(newPage)
getLinks("")